# -*- coding: utf-8 -*-
"""
Spyder Editor
---
Author: Matt Ingram
* Adapted from Anselin and Rey (2014), and Sergio Rey's GitHub resources:
    https://github.com/sjsrey
Institution: University at Albany, SUNY
Project: Harbers and Ingram, for PSRM 
Date Created: June 12, 2017
Last Updated: November 8, 2018
---
Replication notes:
Main analysis conducted in R (see separate file);
This Python script generates LISA values using a conditional 
permutation approach advocated by Anselin (1995) and 
Anselin and Rey (2014).
---
"""

####################################################
# Set Environment
####################################################

# check version of python running
import sys
sys.version # here, showed python 3.5.2
# or
print (sys.version) # parentheses required in Python 3
# or
sys.version_info

####################################################
# Set Directory
####################################################
# check working directory
import os
import os.path
os.getcwd() # gets current working directory

# change directory
os.chdir("C:/Users/mi122167/Dropbox/Met Matt/PSRM2018/replicationmaterials2018v2")
os.getcwd()
#list contents
os.listdir(os.curdir) 

#####################################################
# Load Data
#####################################################

# check whether the path exists to the desired file
os.path.exists('./shapefiles/NAT2.SHP') # should result in "True" if file path exists
os.path.exists('./shapefiles/NAT2.DBF')
# note: calling directory remains unchanged; your basic python directory remains the same

# loading packages and libraries
import pysal as ps
import numpy as np
from pysal.weights.spatial_lag import lag_spatial as slag
from pysal.esda.smoothing import assuncao_rate
import scipy.stats as stats
ps.version
# should show most recent version
# pysal versions are released on semester basis (every 6 months) at start
ps.open.check()
# this checks that pysal opened correctly; should show list of file extensions

# load data
db = ps.open('./shapefiles/NAT2.dbf', 'r')
y = np.array([db.by_col('HR90')]).T
x_names = ['RD90','PS90','MA90','DV90','UE90','SOUTH']
x = np.array([db.by_col(var) for var in x_names]).T
y.shape
x.shape

###################################
# Constructing Spatial Weights
###################################

# for nearest neighbors, note that distance type (Euclidean, Manhattan, great circle) 
# can cause different results, as well as how ties are broken (random selection or full inclusion)
# distance options: p=1 (for Manhattan distance); p=2 (for Euclidean distance; default)
# or radius=pysal.cg.RADIUS_EARTH_MILES (or KM at end); use this if shapefile is unprojected (Anselin and Rey, 90)
# in pysal, all ties are broken by random selection
w10nnb = ps.knnW_from_shapefile("./shapefiles/NAT2.SHP", k=10,idVariable="FIPSNO") # Euclidean distance
#w10nnb = ps.knnW_from_shapefile("NAT2.SHP", k=10,p=1,idVariable="FIPSNO") # Manhattan distance
#w10nnb = ps.knnW_from_shapefile("NAT2.SHP", k=10,idVariable="FIPSNO",radius=pysal.cg.RADIUS_EARTH_MILES) # great circle distance
#quick checks
w10nnb.id_order[:3]
#w10nnb.weights[27077]
#w10nnb.neighbors[27077]
# compare different neighbors of 27077 between Euclidean option and great circle option

# transform from binary to row-standardized
w10nnb.transform = "R"
#w10nnb.weights[27077]


###########################
# creating spatial lags
###########################
# generic command structure:
# wy = pysal.lag_spatial(w,y) 
# where w is NxN weights matrix and y is Nxk array of k vectors to be lagged

# lagged DV; using y and w10nnb created above
ylag = ps.lag_spatial(w10nnb,y)
ylag.shape
# check first 5 rows of ylag
ylag[:5,]


##################################################
#
# permutation based LISAs of DV
#
##################################################

# because permutations are random, need to set seed in order to replicate results
# see pysal documentation, e.g., pp. 33, 40 (note 4)
np.random.seed(12345)

# LOCAL MORAN

# example from Sergio Rey at GitHub
#np.random.seed(10)
#w = ps.open(ps.examples.get_path("desmith.gal")).read()
#f = ps.open(ps.examples.get_path("desmith.txt"))
#y = np.array(f.by_col['z'])
#lm = ps.Moran_Local(y, w, transformation = "r", permutations = 99)
# also ps.esda.moran.Moran_Local(y,w,transformation="r",permutations=99)
#dir(lm)
#lm.q
# end example

# second example built on first
# this works using NAT data and weights file from sample data sets
#np.random.seed(10)
#w = ps.open(ps.examples.get_path("NAT2.gwt")).read()
# or
w = w10nnb
w.n
# provides dimension of w; should be 3085

f = ps.open("./shapefiles/NAT2.dbf", 'r')
y = np.array(f.by_col['HR90'])
y.shape
lm = ps.Moran_Local(y, w, transformation = "r", permutations = 999, geoda_quads=True)
dir(lm) # dir() shows all functions associated with object
lm.Is # local moran values
lm.q # quadrants
lm.p_sim # p-values based on permutations (one-sided)
lm.p_z_sim # p-values based on standard normal approximation from
            # permutations (one-sided)
            # for two-sided tests, these values should be multiplied by 2
I = lm.Is
I.shape
q=lm.q
q.shape
pperm = lm.p_sim*2 # two-sided
pperm.shape
pnorm = lm.p_z_sim*2 # two-sided
pnorm.shape

print (q) # python 3 requires parentheses
dir(q)
max(q)
np.min(q)
np.max(q)
np.median(q)

# define new var qsig=q and replace with 0 if p>.05
# export shapefile as csv using ogr

qsig1 = [] # brackets denote a list
qsig1
for i in range(len(q)):
    if pperm[i]<=0.05:
        qsig1.append(q[i])
    else:
        qsig1.append(0)
print (qsig1)
qsig1array = np.array(qsig1)
qsig1array
np.min(qsig1)
np.max(qsig1)
np.mean(qsig1)

qsig2 = [] # brackets denote a list
qsig2
for i in range(len(q)):
    if pnorm[i]<=0.05:
        qsig2.append(q[i])
    else:
        qsig2.append(0)
print (qsig2)

qsig2array = np.array(qsig2)
qsig2array
np.min(qsig2)
np.max(qsig2)
np.mean(qsig2)


# same results with following shorter sequence:
#alpha = .05
#qsig2 = lm.q.copy()
#yp = 2*lm.p_z_sim.copy()
#nsig = yp >  alpha
#qsig2[nsig] = 0

qsig1
qsig2

np.min(qsig1)
np.max(qsig1)
np.median(qsig1)
np.mean(qsig1)

np.min(qsig2)
np.max(qsig2)
np.median(qsig2)
np.mean(qsig2)

'''
# could also use variation of command
lm = ps.esda.moran.Moran_Local(y, w, transformation = "r", permutations = 999, geoda_quads=True)
'''

##################
# writing to .dbf  ## NOTE: does not recogninze module object; what am I missing?
##################
import pysal as ps
import numpy as np
import pandas as pd
import os
import ast
import shutil
from shutil import copyfile

# Define command "appendcol2dbf"
# could use raw code from GitHub for dataIO.py
# just specify Replace=False at start since not in command options
# NOTE: run everything from "def" to (db.close) comment # end define command
# 35 lines down
def appendcol2dbf(dbf_in, dbf_out, col_name, col_spec, col_data,
    replace=False):
# open the original dbf and create a new one with the new field
    db = ps.open(dbf_in)
    db_new = ps.open(dbf_out, 'w')
    db_new.header = db.header
    db_new.header.append(col_name)
    db_new.field_spec = db.field_spec
    db_new.field_spec.append(col_spec)

    # populate the dbf with the original and new data
    item = 0
    for rec in db:
        rec_new = rec
        rec_new.append(col_data[item])
        db_new.write(rec_new)
        item += 1

    # close the files
    db_new.close()
    db.close()

# the following text will delete the old dbf and replace it with the new
#one retaining the name of the original file.
    if replace is True:
        os.remove(dbf_in)
        os.rename(dbf_out, dbf_in)

# generate new version of revised shp and shx to match new dbf
    if not os.path.exists(dbf_out[:-4] + '.shp'):
        copyfile(dbf_in[:-4] + '.shp', dbf_out[:-4] + '.shp')
    if not os.path.exists(dbf_out[:-4] + '.shx'):
        copyfile(dbf_in[:-4] + '.shx', dbf_out[:-4] + '.shx')

# end command define        

# save to file
        # check working directory
os.getcwd()
from os import listdir
os.listdir(os.curdir)
# commands
dbf_in = './shapefiles/NAT2.dbf'
dbf_out = './shapefiles/NAT3.dbf'
col_name = 'ls.py.I'
col_spec = ('N',9,3) # defines numeric type, length, and 
# precision to 0 decimal places
db = ps.open(dbf_in)
n = I
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)

# check if new var added to new file        
db = ps.open('./shapefiles/NAT3.dbf', 'r')
db.header # new var added at end
np.array([db.by_col('ls.py.I')]).T.shape

'''
# copy shp and shx
if not os.path.exists(dbf_out[:-4] + '.shp'):
   copyfile(dbf_in[:-4] + '.shp', dbf_out[:-4] + '.shp')
if not os.path.exists(dbf_out[:-4] + '.shx'):
   copyfile(dbf_in[:-4] + '.shx', dbf_out[:-4] + '.shx')
'''

# add another var (qsig1)
dbf_in = './shapefiles/NAT3.dbf'
dbf_out = './shapefiles/NAT3-2.dbf'
col_name = 'ls.py.pperm'
col_spec = ('N',9,3) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = pperm
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-2.dbf', 'r')
db.header # new var added at end

'''
# copy shp and shx
if not os.path.exists(dbf_out[:-4] + '.shp'):
   copyfile(dbf_in[:-4] + '.shp', dbf_out[:-4] + '.shp')
if not os.path.exists(dbf_out[:-4] + '.shx'):
   copyfile(dbf_in[:-4] + '.shx', dbf_out[:-4] + '.shx')
'''

# add another var (qsig1)
dbf_in = './shapefiles/NAT3-2.dbf'
dbf_out = './shapefiles/NAT3-3.dbf'
col_name = 'ls.py.pnorm'
col_spec = ('N',9,3) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = pnorm
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-3.dbf', 'r')
db.header # new var added at end

'''
# copy shp and shx
if not os.path.exists(dbf_out[:-4] + '.shp'):
   copyfile(dbf_in[:-4] + '.shp', dbf_out[:-4] + '.shp')
if not os.path.exists(dbf_out[:-4] + '.shx'):
   copyfile(dbf_in[:-4] + '.shx', dbf_out[:-4] + '.shx')
'''

# add another var (qsig1)
dbf_in = './shapefiles/NAT3-3.dbf'
dbf_out = './shapefiles/NAT3-4.dbf'
col_name = 'ls.py.clperm'
col_spec = ('N',9,0) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = qsig1
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-4.dbf', 'r')
db.header # new var added at end

'''
# copy shp and shx
if not os.path.exists(dbf_out[:-4] + '.shp'):
   copyfile(dbf_in[:-4] + '.shp', dbf_out[:-4] + '.shp')
if not os.path.exists(dbf_out[:-4] + '.shx'):
   copyfile(dbf_in[:-4] + '.shx', dbf_out[:-4] + '.shx')
'''

# add another var (qsig1)
dbf_in = './shapefiles/NAT3-4.dbf'
dbf_out = './shapefiles/NAT3-5.dbf'
col_name = 'ls.py.clnorm'
col_spec = ('N',9,0) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = qsig2
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-5.dbf', 'r')
db.header # new var added at end


##################################################
#
# permutation-based LISAs of residuals
#
##################################################

db = ps.open('./shapefiles/NAT3-5.dbf', 'r')
db.header # new var added at end

res = np.array(db.by_col('ols2res'))
res.shape
lm3 = ps.Moran_Local(res, w, transformation = "r", permutations = 999, geoda_quads=True)
dir(lm) # dir() shows all functions associated with object
lm3.Is # local moran values
lm3.q # quadrants
lm3.p_sim # p-values based on permutations (one-sided)
lm3.p_z_sim # p-values based on standard normal approximation from
            # permutations (one-sided)
            # for two-sided tests, these values should be multiplied by 2
I = lm3.Is
I.shape
q=lm3.q
q.shape
pperm = lm3.p_sim*2 # two-sided
pperm.shape
pnorm = lm3.p_z_sim*2 # two-sided
pnorm.shape

print (q) # python 3 requires parentheses
dir(q)
max(q)
np.min(q)
np.max(q)
np.median(q)

# define new var qsig=q and replace with 0 if p>.05
# export shapefile as csv using ogr

qsig1res = [] # brackets denote a list
qsig1res
for i in range(len(q)):
    if pperm[i]<=0.05:
        qsig1res.append(q[i])
    else:
        qsig1res.append(0)
print (qsig1res)
qsig1resarray = np.array(qsig1res)
qsig1resarray
np.min(qsig1res)
np.max(qsig1res)
np.mean(qsig1res)

# add another var
dbf_in = './shapefiles/NAT3-5.dbf'
dbf_out = './shapefiles/NAT3-6.dbf'
col_name = 'ls.py.lresp'
col_spec = ('N',9,3) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = I
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-6.dbf', 'r')
db.header # new var added at end

# add another var
dbf_in = './shapefiles/NAT3-6.dbf'
dbf_out = './shapefiles/NAT3-7.dbf'
col_name = 'ls.py.presp'
col_spec = ('N',9,3) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = pperm
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-7.dbf', 'r')
db.header # new var added at end


# add another var
dbf_in = './shapefiles/NAT3-7.dbf'
dbf_out = './shapefiles/NAT3-8.dbf'
col_name = 'ls.py.qresp'
col_spec = ('N',9,0) # defines numeric type, length, and 
# precision to 3 decimal places
db = ps.open(dbf_in)
n = qsig1res
col_data = n
appendcol2dbf(dbf_in,dbf_out,col_name,col_spec,col_data,replace=False)
db = ps.open('./shapefiles/NAT3-8.dbf', 'r')
db.header # new var added at end


# add .prj component to shapefile
import shapefile as sf
filename = './shapefiles/NAT3-8'

# create the PRJ file
prj = open("%s.prj" % filename, "w")
epsg = 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
prj.write(epsg)
prj.close()

#end
